# smacof analysis of 1996 wine data
# Lew Harvey
# 7 February 2016
library("smacof")
library("rgl")
library("psych")
rm(list = ls())
Read in the data and wine properties
fn <- "wine_96(11Ss).csv"
df <- read.csv(fn, header = TRUE)
# read in wine properties
df.prop <- read.csv("WineQuality2_96_Data.csv", header = TRUE)
df.prop$type <- c(rep("yellow", 5), rep("red", 5))
The data in the input file looks like this for the first subject:
head(df, 10)
## wa wb wc wd we wf wg wh wi wj
## 1 0 NA NA NA NA NA NA NA NA NA
## 2 5 0 NA NA NA NA NA NA NA NA
## 3 5 6 0 NA NA NA NA NA NA NA
## 4 3 5 4 0 NA NA NA NA NA NA
## 5 4 4 7 2 0 NA NA NA NA NA
## 6 7 8 7 8 7 0 NA NA NA NA
## 7 5 4 6 4 8 6 0 NA NA NA
## 8 7 5 9 8 6 7 2 0 NA NA
## 9 4 4 8 8 6 4 6 7 0 NA
## 10 8 7 8 4 5 6 3 6 3 0
Convert the data into a list of matrices, one for each subject. This is a bit more complicated than it should be, but the original data are in an SPSS file that requires some massaging to get the data into a form for R
# loop through the data frame to
# create separate matrices for the
# data sets of each subject
nstim <- ncol(df)
nsets <- nrow(df) / ncol(df)
mlist <- list(NA)
mm <- list(NA)
colnames(df) <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
winename <- colnames(df)
for (i in 1:nsets){
mm[i] <- paste("m", i, sep = "")
i1 <- ((i - 1) * nstim) + 1
i2 <- i1 + nstim - 1
mlist[i] <- list(mm = as.matrix(df[i1:i2,]))
row.names(mlist[[i]]) <- winename
mlist[[i]] <- as.dist(mlist[[i]])
}
names(mlist) <- unlist(mm)
Now the matrices are in a list, one for each subject. Here are the first subject’s data as a distance matrix:
mlist[1]
## $m1
## A B C D E F G H I
## B 5
## C 5 6
## D 3 5 4
## E 4 4 7 2
## F 7 8 7 8 7
## G 5 4 6 4 8 6
## H 7 5 9 8 6 7 2
## I 4 4 8 8 6 4 6 7
## J 8 7 8 4 5 6 3 6 3
wine2D <- smacofIndDiff(mlist, ndim = 2,
type = "ordinal",
constraint = "indscal",
itmax = 1000)
wine2D
##
## Call: smacofIndDiff(delta = mlist, ndim = 2, type = "ordinal", constraint = "indscal",
## itmax = 1000)
##
## Model: Three-way SMACOF
## Number of objects: 10
## Stress-1 value: 0.209
## Number of iterations: 329
par(mfcol = c(2, 2))
par(pty = "s")
plot(wine2D, type = "p",
plot.type = "confplot",
label.conf = list(TRUE, 1, 1),
xlim = c(-0.9, 0.9),
ylim = c(-0.9, 0.9))
abline(h = 0, col = "gray")
abline(v = 0, col = "gray")
par(pty = "m")
plot(wine2D, plot.type = "Shepard")
plot(wine2D, plot.type = "stressplot", ylim = c(0, 25))
plot(wine2D, plot.type = "resplot")
par(mfcol = c(1, 1))
par(pty = "s")
plot(wine2D, type = "p", pch = 19,
plot.type = "confplot",
label.conf = list(TRUE, 1, 1),
xlim = c(-0.9, 0.9),
ylim = c(-0.9, 0.9))
abline(h = 0, col = "gray")
abline(v = 0, col = "gray")
par(pty = "m")
par(pty = "s")
plot(wine2D, type = "p", pch = 19,
col = df.prop$type,
plot.type = "confplot",
label.conf = list(TRUE, 1, 1),
xlim = c(-0.9, 0.9),
ylim = c(-0.9, 0.9))
abline(h = 0, col = "gray")
abline(v = 0, col = "gray")
par(pty = "m")
Another plot we can make is the position of each stimulus in the individual subject space. These configurations are stored in the $conf attribute in the scaling results object.att (use the attributes(wine2D) command to see all the parts contained in the INDSCAL results object)
par(pty = "s")
ns <- length(wine2D$conf)
pltmax <- max(abs(unlist(wine2D$conf)))
plot(0, 0, type = "n",
xlim = c(-pltmax, pltmax),
ylim = c(-pltmax, pltmax))
# add the stimulus points from individual subject spaces
for (i in 1:ns){
points(wine2D$conf[[i]][,"D1"], wine2D$conf[[i]][,"D2"],
col = df.prop$type)}
abline(v = 0, col = "gray")
abline(h = 0, col = "gray")
par(pty = "m")
Here we plot the weights of the individual subjects on each dimension. The code for extracting the weights from the results seems complicated but basically involves converting a list of matrices into a data frame based just on the diagonals of the matrices.
sswgts2D <- t(as.data.frame(lapply(wine2D$cweights, diag)))
par(pty = "s")
plot(D2 ~ D1, data = sswgts2D,
xlim = c(0, 2),
ylim = c(0, 2),
main = "Subject weights in 2D")
abline(0, 1, col = "gray")
par(pty = "m")
wine3D <- smacofIndDiff(mlist,
ndim = 3,
type = "ordinal",
constraint = "indscal",
verbose = FALSE,
itmax = 5000)
wine3D
##
## Call: smacofIndDiff(delta = mlist, ndim = 3, type = "ordinal", constraint = "indscal",
## verbose = FALSE, itmax = 5000)
##
## Model: Three-way SMACOF
## Number of objects: 10
## Stress-1 value: 0.152
## Number of iterations: 1543
This plot cannot be printed since it is 3D and can be rotated by the user
plot3d(wine3D$gspace, type = "s",
expand = 1.3,
xlab = "X",
ylab = "Y",
zlab = "Z")
text3d(wine3D$gspace,
text = rownames(wine3D$gspace),
adj = c(2, 2))
This plot cannot be printed since it is 3D and can be rotated by the user
plot3d(wine3D$gspace, type = "s",
expand = 1.3,
col = df.prop$type,
xlab = "Wine Color",
ylab = "Y",
zlab = "Z")
text3d(wine3D$gspace,
text = rownames(wine3D$gspace),
adj = c(2, 2))
Another plot we can make is the position of each stimulus in the individual subject space. These configurations are stored in the $conf attribute in the scaling results object.att (use the attributes(wine2D) command to see all the parts contained in the INDSCAL results object)
par(pty = "s")
ns <- length(wine3D$conf)
pltmax <- max(abs(unlist(wine3D$conf)))
plot3d(0, 0, type = "n",
xlim = c(-pltmax, pltmax),
ylim = c(-pltmax, pltmax),
zlim = c(-pltmax, pltmax))
# add the stimulus points from individual subject spaces
for (i in 1:ns){
plot3d(wine3D$conf[[i]],
add = TRUE,
radius = .04,
type = "s",
col = df.prop$type)
}
par(pty = "m")
Here we plot the weights of the individual subjects on each dimension. The code for extracting the weights from the results seems complicated but basically involves converting a list of matrices into a data frame based just on the diagonals of the matrices.
sswgts3D <- t(as.data.frame(lapply(wine3D$cweights, diag)))
par(pty = "s")
plot3d(sswgts3D,
xlim = c(0, 2),
ylim = c(0, 2),
zlim = c(0, 2),
main = "Subject weights in 3D")
lines3d(xyz.coords(c(0, 2), c(0, 2), c(0, 2)), col = "gray")
par(pty = "m")
df.prop[1:10, 1:8]
## Subject Wine Body Color Aroma Fruitiness Nuttiness Sweetness
## 1 Mean Wine A 4.000 3.250 3.625 8.000 2.571 6.714
## 2 Mean Wine B 3.571 3.143 3.429 9.143 1.286 8.714
## 3 Mean Wine C 1.250 2.857 3.750 9.250 1.250 11.333
## 4 Mean Wine D 3.429 3.125 4.625 5.750 3.000 5.500
## 5 Mean Wine E 2.857 3.714 3.714 7.000 2.286 6.286
## 6 Mean Wine F 6.000 7.143 5.125 4.571 3.857 3.286
## 7 Mean Wine G 6.571 7.000 5.750 5.125 5.143 3.625
## 8 Mean Wine H 7.000 8.000 5.123 2.875 3.500 2.250
## 9 Mean Wine I 6.286 8.000 5.714 4.571 4.571 4.429
## 10 Mean Wine J 7.375 7.750 5.000 4.500 4.500 3.625
qual <- names(df.prop)[3:15]
tmp <- biplotmds(wine2D, df.prop[, qual])
op <- par(pty = "s")
plot(tmp, col = df.prop$type,
xlim = c(-2, 2),
ylim = c(-2, 2))
par(op)
tmp
##
## Call:
## lm(formula = ext ~ -1 + X)
##
## Coefficients:
## Body Color Aroma Fruitiness Nuttiness Sweetness
## D1 1.56941 1.62388 1.48247 -1.62652 1.50177 -1.59136
## D2 0.43480 -0.62716 -0.58834 0.39246 0.32249 -0.20236
## Tannic.Acid Dryness Complexity Roundness Spiciness Quality
## D1 1.56137 1.65025 1.50801 1.28171 1.26017 1.41432
## D2 0.02995 0.10981 0.22030 1.08774 -0.40636 0.87796
## Openness
## D1 0.34505
## D2 0.08177
qual <- names(df.prop)[3:15]
tmp <- biplotmds(wine3D, df.prop[, qual])
tmpt <- t(coefficients(tmp))
tmpt
## D1 D2 D3
## Body 1.7301702 -0.21723772 -0.2241366
## Color 1.7305865 -0.92643591 0.3007068
## Aroma 1.5139030 -1.57146597 -0.9948987
## Fruitiness -1.6294741 0.59845718 0.2962896
## Nuttiness 1.5752536 -0.58996641 -1.5928500
## Sweetness -1.6408350 0.06828168 0.6379175
## Tannic.Acid 1.6757069 -0.73310109 -0.7947114
## Dryness 1.7374746 -0.12277573 -0.2621727
## Complexity 1.6724328 -0.51488529 -0.3554648
## Roundness 1.5132445 0.16797693 -0.8673027
## Spiciness 1.3965937 -1.50479412 -0.4003672
## Quality 1.5686664 1.08713842 0.1210472
## Openness 0.3812162 0.26891855 0.7111767
op <- par(pty = "s")
plot(tmp, col = df.prop$type,
xlim = c(-2, 2),
ylim = c(-2, 2))
par(op)
fa.pc <- fa.parallel(df.prop[, qual], fa = "pc")
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## Parallel analysis suggests that the number of factors = NA and the number of components = 1
abline(h = 1)
pc.qual <- principal(df.prop[, qual], nfactors = 2, rotate = "varimax")
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
pc.qual
## Principal Components Analysis
## Call: principal(r = df.prop[, qual], nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 h2 u2 com
## Body 0.96 0.16 0.94 0.058 1.1
## Color 0.95 0.02 0.91 0.092 1.0
## Aroma 0.90 0.04 0.82 0.183 1.0
## Fruitiness -0.91 -0.20 0.86 0.138 1.1
## Nuttiness 0.93 0.10 0.88 0.117 1.0
## Sweetness -0.89 -0.37 0.93 0.067 1.3
## Tannic.Acid 0.99 0.01 0.99 0.010 1.0
## Dryness 0.96 0.21 0.96 0.044 1.1
## Complexity 0.97 0.03 0.94 0.063 1.0
## Roundness 0.88 0.08 0.79 0.214 1.0
## Spiciness 0.90 -0.35 0.94 0.062 1.3
## Quality 0.78 0.51 0.86 0.141 1.7
## Openness 0.01 0.93 0.87 0.129 1.0
##
## PC1 PC2
## SS loadings 10.17 1.51
## Proportion Var 0.78 0.12
## Cumulative Var 0.78 0.90
## Proportion Explained 0.87 0.13
## Cumulative Proportion 0.87 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.05
## with the empirical chi square 3.47 with prob < 1
##
## Fit based upon off diagonal values = 1